home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
EnigmA Amiga Run 1996 June
/
EnigmA AMIGA RUN 08 (1996)(G.R. Edizioni)(IT)[!][issue 1996-06][EARSAN CD VII].iso
/
earcd
/
texts
/
mpindx50.lha
/
MPIndex50Src.lha
/
sh
/
MakeMPIndex.c
next >
Wrap
C/C++ Source or Header
|
1996-05-10
|
15KB
|
777 lines
// MPIndex - AmigaGuide Indexing program
// Copyright (C) © 1996 Mark John Paddock
// This program is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2 of the License, or
// any later version.
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
// You should have received a copy of the GNU General Public License
// along with this program; if not, write to the Free Software
// Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
// mark@topic.demon.co.uk
// mpaddock@cix.compulink.co.uk
// Change following line to #define MPINDEXPRINT 1 to allow printing (for debugging)
#undef MPINDEXPRINT
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <proto/dos.h>
#include <proto/exec.h>
#include <exec/memory.h>
extern long __oslibversion=39;
extern long __stack = 16000;
struct IgnoreWord {
char *Word;
BOOL ReallyIgnore;
struct IgnoreWord *Left;
struct IgnoreWord *Right;
};
struct IgnoreWord *TopIgnore;
const char Version[]="$VER: MakeMPIndex 5.0 (10.5.96)";
static int NodeCount = 0;
extern void *Chain = NULL;
#define PUDDLE 64000
#define THRESH 3000
struct RDArgs *rdargs = NULL;
APTR
Mycalloc(ULONG size) {
APTR res;
res = AllocPooled(Chain,size);
if (res) {
return res;
}
DeletePool(Chain);
FreeArgs(rdargs);
printf("Out of memory\n");
exit(10);
}
APTR
Mystrdup(UBYTE *str) {
return strcpy(Mycalloc(strlen(str)+1),str);
}
struct Link {
struct MyNode *MyNode;
struct Word *Word;
struct Link *NextNodeLink;
struct Link *NextWordLink;
int Count;
};
struct MyNode {
char *Title;
char *Name;
struct MyNode *Left;
struct MyNode *Right;
struct Link *FirstLink;
int Count;
int Num;
int seq;
};
static struct MyNode *TopNode = NULL;
struct Word {
char *Word;
struct Word *Left;
struct Word *Right;
struct Link *FirstLink;
int Count;
};
static struct Word *TopWord = NULL;
static int WordCount = 0;
struct Word *
AddWord(char *p) {
struct Word *Word1;
int strres;
if (!TopWord) {
TopWord = Mycalloc(sizeof(struct Word));
TopWord->Word = Mystrdup(p);
return TopWord;
}
Word1 = TopWord;
while (1) {
strres = stricmp(Word1->Word,p);
if (!strres) {
return Word1;
}
if (strres > 0) {
if (!Word1->Left) {
Word1->Left = Mycalloc(sizeof(struct Word));
Word1->Left->Word = Mystrdup(p);
return Word1->Left;
}
Word1 = Word1->Left;
}
else {
if (!Word1->Right) {
Word1->Right = Mycalloc(sizeof(struct Word));
Word1->Right->Word = Mystrdup(p);
return Word1->Right;
}
Word1 = Word1->Right;
}
}
}
static void
LinkInWord(struct MyNode *MyNode,struct Word *Word,struct Link *Link1) {
struct Link *Link,*Link2;
Link = Word->FirstLink;
Link2 = NULL;
while (Link &&
(0 < stricmp(Link->MyNode->Title,MyNode->Title))) {
Link2 = Link;
Link = Link->NextWordLink;
}
if (Link) {
Link1->NextWordLink = Link;
if (Link2) {
Link2->NextWordLink = Link1;
}
else {
Word->FirstLink = Link1;
}
}
else {
if (Link2) {
Link2->NextWordLink = Link1;
}
else {
Link1->NextWordLink = Word->FirstLink;
Word->FirstLink = Link1;
}
}
}
static void
LinkNodeWord(struct MyNode *MyNode,struct Word *Word) {
struct Link *Link,*Link1,*Link2;
Link2 = NULL;
Link = MyNode->FirstLink;
while (Link && (stricmp(Link->Word->Word,Word->Word) < 0)) {
Link2 = Link;
Link = Link->NextNodeLink;
}
if (Link) {
if (!stricmp(Link->Word->Word,Word->Word)) {
++Link->Count;
}
else {
++Word->Count;
++MyNode->Count;
Link1 = Mycalloc(sizeof(struct Link));
Link1->MyNode = MyNode;
Link1->Word = Word;
Link1->Count = 1;
Link1->NextNodeLink = Link;
if (Link2) {
Link2->NextNodeLink = Link1;
}
else {
MyNode->FirstLink = Link1;
}
LinkInWord(MyNode,Word,Link1);
}
}
else {
++Word->Count;
++MyNode->Count;
Link1 = Mycalloc(sizeof(struct Link));
Link1->MyNode = MyNode;
Link1->Word = Word;
Link1->Count = 1;
if (Link2) {
Link2->NextNodeLink = Link1;
}
else {
Link1->NextNodeLink = MyNode->FirstLink;
MyNode->FirstLink = Link1;
}
LinkInWord(MyNode,Word,Link1);
}
}
BOOL
CheckIgnore(char *p) {
struct IgnoreWord *IgnoreWord;
int strres;
IgnoreWord = TopIgnore;
if (!IgnoreWord) {
return TRUE;
}
while (1) {
strres = stricmp(IgnoreWord->Word,p);
if (!strres) {
return (BOOL)!IgnoreWord->ReallyIgnore;
}
if (strres > 0) {
if (!IgnoreWord->Left) {
return TRUE;
}
IgnoreWord = IgnoreWord->Left;
}
else {
if (!IgnoreWord->Right) {
return TRUE;
}
IgnoreWord = IgnoreWord->Right;
}
}
}
static void
AddWords(struct MyNode *MyNode,char *buffer) {
char tbuff[257];
struct Word *Word;
char *p,*p1,*p2;
int Done = 0;
strcpy(tbuff,buffer);
p = tbuff;
while (!Done) {
if ('@' == *p) {
++p;
while (*p && (isalnum(*p) || ('_' == *p) || ('-' == *p))) {
++p;
}
if (!*p) {
Done = 1;
}
}
else {
if (isalnum(*p)) {
p1 = p;
while (*p1 && (isalnum(*p1) || ('_' == *p) || ('-' == *p) || ('.' == *p))) {
++p1;
}
p2 = p1;
--p2;
while ('.' == *p2) {
*p2 = 0;
--p2;
}
if (*p1) {
if ('\n' == *p1) {
Done = 1;
}
*p1 = 0;
}
else {
Done = 1;
}
if (CheckIgnore(p)) {
Word = AddWord(p);
LinkNodeWord(MyNode,Word);
}
p = p1;
}
else {
if (!*p) {
Done = 1;
}
}
}
++p;
}
}
static void
WriteNodes(struct MyNode *MyNode,FILE *fp) {
if (!MyNode) {
return;
}
if (MyNode->Left) {
WriteNodes(MyNode->Left,fp);
}
if (MyNode->Count) {
fprintf(fp,"%ld+%s\n",MyNode->seq,MyNode->Name);
fprintf(fp,"%s\n",MyNode->Title);
}
if (MyNode->Right) {
WriteNodes(MyNode->Right,fp);
}
}
static void
CountNodes(struct MyNode *MyNode) {
if (!MyNode) {
return;
}
if (MyNode->Left) {
CountNodes(MyNode->Left);
}
if (MyNode->Title) {
MyNode->Num = NodeCount;
++NodeCount;
}
if (MyNode->Right) {
CountNodes(MyNode->Right);
}
}
#ifdef MPINDEXPRINT
static void
PrintNode(struct MyNode *MyNode) {
struct Link *Link;
if (!MyNode) {
return;
}
if (MyNode->Left) {
PrintNode(MyNode->Left);
}
printf("%s - %s\t%d\n",MyNode->Name,MyNode->Title,MyNode->Count);
Link = MyNode->FirstLink;
while (Link) {
printf("\t%s\t%d\n",Link->Word->Word,Link->Count);
Link = Link->NextNodeLink;
}
if (MyNode->Right) {
PrintNode(MyNode->Right);
}
}
#endif
static void
WriteWords(struct Word *Word,FILE *fp,ULONG max) {
struct Link *Link;
if (!Word) {
return;
}
if (Word->Left) {
WriteWords(Word->Left,fp,max);
}
if ((0 == max) || !(Word->Count > max)) {
if (Word->Count) {
fprintf(fp,"!%s\n",Word->Word);
Link = Word->FirstLink;
while (Link) {
fprintf(fp,"%d\n",Link->MyNode->Num);
Link = Link->NextWordLink;
}
}
}
if (Word->Right) {
WriteWords(Word->Right,fp,max);
}
}
#ifdef MPINDEXPRINT
static void
PrintWord(struct Word *Word) {
struct Link *Link;
if (!Word) {
return;
}
if (Word->Left) {
PrintWord(Word->Left);
}
printf("Word - %s\t%d\n",Word->Word,Word->Count);
Link = Word->FirstLink;
while (Link) {
printf("\t%s\t%d\n",Link->MyNode->Title,Link->Count);
Link = Link->NextWordLink;
}
if (Word->Right) {
PrintWord(Word->Right);
}
}
#endif
static int InCurly = 0;
static void
Strip(char *buffer) {
char *p,*p1;
p = buffer;
p1 = buffer;
while (*p1) {
if (InCurly) {
while (*p1 && (*p1 != '}')) {
++p1;
}
if (*p1) {
++p1;
InCurly = 0;
}
}
else {
if (('@' == *p1) && ('{' == p1[1])) {
if ('"' == p1[2]) {
++p1;
++p1;
++p1;
while (*p1 && (*p1 != '"')) {
*p++ = *p1++;
}
InCurly = 1;
}
else {
InCurly = 1;
}
}
else {
*p++ = *p1++;
}
}
}
*p = 0;
}
void
WriteFiles(char **files,FILE *fp) {
int i;
for (i=0; files[i]; ++i) {
fprintf(fp,"+%s\n",files[i]);
}
}
static void
AddNode(struct MyNode *MyNode) {
struct MyNode *Node1;
if (!TopNode) {
TopNode = MyNode;
return;
}
Node1 = TopNode;
while (1) {
if (stricmp(Node1->Name,MyNode->Name) > 0) {
if (!Node1->Left) {
Node1->Left = MyNode;
return;
}
else {
Node1 = Node1->Left;
}
}
else {
if (!Node1->Right) {
Node1->Right = MyNode;
return;
}
else {
Node1 = Node1->Right;
}
}
}
}
static struct MyNode *
FindNode(struct MyNode *MyNode, int i) {
struct MyNode *Node1;
if (!MyNode) {
return NULL;
}
if (MyNode->Title) {
if (MyNode->Num == i) {
return MyNode;
}
}
if (MyNode->Left) {
if (Node1 = FindNode(MyNode->Left,i)) {
return Node1;
}
}
if (MyNode->Right) {
if (Node1 = FindNode(MyNode->Right,i)) {
return Node1;
}
}
}
static void
AddIgnore(char *p,BOOL ignore) {
struct IgnoreWord *Node,*Node1;
int strres;
// May not get used if duplicate
Node = Mycalloc(sizeof(struct IgnoreWord));
Node->Word = Mystrdup(p);
Node->ReallyIgnore = ignore;
if (!TopIgnore) {
TopIgnore = Node;
return;
}
Node1 = TopIgnore;
while (1) {
strres = stricmp(Node1->Word,Node->Word);
if (strres > 0) {
if (!Node1->Left) {
Node1->Left = Node;
return;
}
else {
Node1 = Node1->Left;
}
}
else {
if (!strres) {
// duplicate - must be adding a real ignore
Node1->ReallyIgnore = TRUE;
return;
}
else {
if (!Node1->Right) {
Node1->Right = Node;
return;
}
else {
Node1 = Node1->Right;
}
}
}
}
}
#ifdef MPINDEXPRINT
#define TEMPLATE "TO/A,MERGE/K,IGNORE/K,MAXNODE/K/N,NODE/S,FROM/M,PRINT/S"
#else
#define TEMPLATE "TO/A,MERGE/K,IGNORE/K,MAXNODE/K/N,NODE/S,FROM/M"
#endif
#define OPT_TO 0
#define OPT_MERGE 1
#define OPT_IGNORE 2
#define OPT_MAX 3
#define OPT_NODE 4
#define OPT_FROM 5
#ifdef MPINDEXPRINT
#define OPT_PRINT 6
#define OPT_COUNT 7
#else
#define OPT_COUNT 6
#endif
static char Balance[32]="PHXDLT1BFJNRVZ3ACEGIKMOQSUWY024";
// 1234567890123456789012345678901
//ABCDEFGHIJKLMNOPQRSTUVWXYZ01234"
// X
// X X
// X X X X
// X X X X X X X X
//X X X X X X X X X X X X X X X X
int
main(int argc, char **argv) {
long opts[OPT_COUNT] = {
0
};
FILE *in,*out;
char *p,*p1,*p2;
struct MyNode *MyNode,*Node1;
struct Word *Word;
char buffer[257];
char bbuf[2];
int i, resx=0;
int merged=0;
int seq;
char *t;
char *file;
if (!(rdargs = ReadArgs((char *)TEMPLATE, opts, NULL))) {
PrintFault(IoErr(), NULL);
return 10;
}
Chain = CreatePool(MEMF_CLEAR,PUDDLE,THRESH);
if (!Chain) {
printf("Error allocating memory pool\n");
FreeArgs(rdargs);
exit(10);
}
// Try and balance trees
bbuf[1]=0;
for (i=0; i<31; ++i) {
bbuf[0]=Balance[i];
AddWord(bbuf);
MyNode = Mycalloc(sizeof(struct MyNode));
MyNode->Name = bbuf;
AddNode(MyNode);
AddIgnore(bbuf,FALSE);
}
if (out=fopen((char *)opts[OPT_TO],"w")) {
if (opts[OPT_IGNORE]) {
if (in = fopen((char *)(opts[OPT_IGNORE]),"r")) {
p = fgets(buffer,256,in);
while (p) {
buffer[strlen(buffer)-1] = 0;
p = fgets(buffer,256,in);
AddIgnore(p,TRUE);
}
fclose(in);
}
else {
printf("Error opening %s\n",(char*)(opts[OPT_IGNORE]));
resx=10;
}
}
if (opts[OPT_MERGE]) {
if (in=fopen((char*)(opts[OPT_MERGE]),"r")) {
p = fgets(buffer,256,in);
while (p) {
while (p && (buffer[0] != '!')) {
buffer[strlen(buffer)-1] = 0;
if ('+' == buffer[0]) {
file = Mystrdup(&(buffer[1]));
fprintf(out,"+%s\n",file);
++merged;
}
else {
seq = 0;
t = buffer;
while ('+' != *t) {
seq *= 10;
seq += (*t++ -'0');
}
++t;
MyNode = Mycalloc(sizeof(struct MyNode));
MyNode->Name = Mystrdup(t);
MyNode->seq = seq;
p = fgets(buffer,256,in);
buffer[strlen(buffer)-1] = 0;
MyNode->Title=Mystrdup(p);
AddNode(MyNode);
}
p = fgets(buffer,256,in);
}
NodeCount = 0;
CountNodes(TopNode);
while (p) {
buffer[strlen(buffer)-1] = 0;
if ('!' == buffer[0]) {
Word = AddWord(&(buffer[1]));
}
else {
seq = 0;
t = buffer;
while (*t) {
seq *= 10;
seq += (*t++ -'0');
}
Node1 = FindNode(TopNode,seq);
LinkNodeWord(Node1,Word);
}
p = fgets(buffer,256,in);
}
}
fclose(in);
}
else {
printf("Error opening %s\n",(char*)(opts[OPT_MERGE]));
resx=10;
}
}
for (i=0; opts[OPT_FROM] && ((char**)(opts[OPT_FROM]))[i]; ++i) {
file = Mystrdup(((char**)(opts[OPT_FROM]))[i]);
if (in=fopen(file,"r")) {
p = fgets(buffer,256,in);
while (p) {
while (p && strnicmp(p,"@node ",6)) {
p = fgets(buffer,256,in);
}
if (p) {
Strip(buffer);
MyNode = Mycalloc(sizeof(struct MyNode));
MyNode->seq = i+merged;
p1 = &(buffer[6]);
while ((*p1) && isspace(*p1)) {
++p1;
}
if (*p1) {
if ('"' == *p1) {
++p1;
}
p2 = p1;
while ((*p2) && !isspace(*p2)) {
++p2;
}
--p2;
if ('"' != *p2) {
++p2;
}
*p2 = 0;
// printf("MyNode - %s",p1);
MyNode->Name = Mystrdup(p1);
if (stricmp(MyNode->Name,"index")) {
p = fgets(buffer,256,in);
while (p && ((Strip(buffer),('@' == *p)) ||
isspace(*p) || ('\n' == *p))) {
p = fgets(buffer,256,in);
}
if (p) {
p[strlen(p) - 1] = 0;
// printf(" - %s\n",p);
if (opts[OPT_NODE]) {
MyNode->Title = Mystrdup(MyNode->Name);
}
else {
MyNode->Title = Mystrdup(p);
}
AddNode(MyNode);
while (p && strnicmp(p,"@endnode",8)) {
AddWords(MyNode,buffer);
if (p = fgets(buffer,256,in)) {
Strip(buffer);
}
}
}
}
else {
free(MyNode);
while (p && strnicmp(p,"@endnode",8)) {
p = fgets(buffer,256,in);
}
}
}
p = fgets(buffer,256,in);
}
}
fclose(in);
}
else {
printf("Error opening %s\n",((char**)(opts[OPT_FROM]))[i]);
resx=10;
}
}
NodeCount = 0;
CountNodes(TopNode);
#ifdef MPINDEXPRINT
if (opts[OPT_PRINT]) {
PrintNode(TopNode);
PrintWord(TopWord);
}
#endif
WriteFiles((char **)opts[OPT_FROM],out);
WriteNodes(TopNode,out);
WriteWords(TopWord,out,opts[OPT_MAX]?*((ULONG *)opts[OPT_MAX]):0);
fclose(out);
}
else {
printf("Error opening %s\n",(char *)opts[OPT_TO]);
resx = 10;
}
FreeArgs(rdargs);
DeletePool(Chain);
return resx;
}